#!/usr/bin/Rscript
##########################################################################################
# Social Media and Policy Responses to the COVID-19 Pandemic in Switzerland
##########################################################################################
# Description:
##########################################################################################
#  Figure 1
##########################################################################################
# Contents
##########################################################################################
# 1) Dependencies
# 2) Load all necessary Things
# 3) Load curated data
# 4) Figure 1
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(tidyr)
library(tidyverse)
library(data.table)
library(readr)
library(lubridate)
library(ggplot2)
library(graphlayouts)
library(scales)
library(purrr)
library(magrittr)
library(sysfonts)
library(cowplot)
##########################################################################################
# 2) Load all necessary Things
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# - load fonts used in plots
font_add_google("Montserrat", "Montserrat")
font_add_google("Roboto", "Roboto")

# load ggplot theme of the DigDemLab 
suppressMessages(suppressWarnings(source('ggplot_theme_ddl.R')))
# - model-configurations to consider
langs <- list(c("de"),
              c("fr"),
              c("it"),
              c("en"),
              c("de","fr","it"),
              c("de","fr","it","en"),
              c("all"))

langusges_l <- c("de","fr","it","en","de_fr_it","de_fr_it_en","alle")

lang_long <- c(" auf Deutsch", " auf Englisch", " auf Französisch", " auf Italienisch", " auf De & FR & IT", " auf De & FR & IT & EN", "")
lang_long <- c( " in German", " in English", " in French", " in Italian", " in DE & FR & IT", " in DE & FR & IT & EN", "")

colorlist1 <- list(`Anderes`="#BFA5A8", `Bund & Kantone`="#DD2461", `Medien (De)`="#4B8178", `Medien (Fr)`="#008E7A", `Partei`="#574144", `Medien (It)`="#32BEAE")
colorlist1 <- list(`Rest`="#BFA5A8", `Fed. Gov. & Cantons`="#DD2461", `Media (De)`="#4B8178", `Media (Fr)`="#008E7A", `Parties`="#574144", `Media (It)`="#32BEAE")
fillcolor1 <- unlist(colorlist1)
lim_max <- "2020-08-20"
##########################################################################################
# 3) Load curated data
##########################################################################################
# - load data:
tdf <- readRDS("../data/Twitter_data_minified.RDS") %>% dplyr::mutate(Datum = as.Date(Datum)) %>% 
  dplyr::filter(Datum > as.Date("2020-02-21") & Datum < as.Date("2020-08-23"))

tdf <- tdf %>% dplyr::filter(la == "de") 

sdf <- readRDS("../data/SMD_CDT_data_minified.RDS") %>% dplyr::filter(pubDateTime > as.Date("2020-02-21") & pubDateTime < as.Date("2020-08-23"))

sdf <- sdf %>% dplyr::filter(la == "de")

fdf <- readRDS("../data/Facebook_data_minified.RDS") %>% dplyr::mutate(Datum = as.Date(Datum)) %>%
  dplyr::filter(Datum > as.Date("2020-02-21") & Datum < as.Date("2020-08-23"))

fdf <- fdf %>% dplyr::filter(la == "de")


tdf <- tdf %>% mutate(Party = tolower(Party)) %>%
  dplyr::filter(Party %in% c("alternative - die grünen zug", "alternative-die grünen kanton zug", "bürgerlich-demokratische partei schweiz", "christlich-soziale partei",
                             "christlichdemokratische volkspartei der schweiz", "christlichdemokratische volkspartei oberwallis", "fdp.die liberalen", 
                             "grüne (basels starke alternative)", "grüne partei der schweiz", "grünliberale partei", "na", "nd", "schweizerische volkspartei",
                             "sozialdemokratische partei der schweiz", NA)) %>% 
  dplyr::mutate(Party = case_when(Party %in% c("grüne (basels starke alternative)", 
                                               "grüne partei der schweiz", 
                                               "alternative - die grünen zug",
                                               "alternative-die grünen kanton zug") ~ "Grüne",
                                  Party %in% c("sozialdemokratische partei der schweiz") ~ "SP",
                                  Party %in% c("schweizerische volkspartei") ~ "SVP",
                                  Party %in% c("fdp.die liberalen") ~ "FDP",
                                  Party %in% c("christdemokratische volkspartei der schweiz",
                                               "christlichdemokratische volkspartei der schweiz",
                                               "christlich-soziale partei", 
                                               "christlichdemokratische volkspartei oberwallis",
                                               "christlichsoziale volkspartei oberwallis") ~ "CVP",
                                  Party %in% c("grünliberale partei") ~ "GLP",
                                  Party %in% c("bürgerlich-demokratische partei schweiz") ~"BDP",
                                  Party %in% c("na", "nd") ~ "NA",
                                  TRUE ~ Party))

tdf <- tdf %>% mutate(Party = ifelse(Party %in% c("NA") == T, NA, Party))
# - remove tweets from actors we are not interested in:
tdf <- tdf %>% dplyr::filter(!Akteur.Typ %in% c("Institute","Gericht", "NA")) %>%
  dplyr::filter((!Akteur.Typ %in% c("Party","Person") & is.na(Party) == F)==F) %>% 
  dplyr::mutate(Akteur.Typ = as.character(Akteur.Typ))%>% 
  dplyr::mutate(Akteur.Typ = ifelse(is.na(Akteur.Typ), "ND", Akteur.Typ))


tdf <- tdf %>% dplyr::mutate(Akteur.Typ = as.character(Akteur.Typ)) %>% 
  dplyr::mutate(Akteur_Art = ifelse(Akteur.Typ == "Party", "Party",
                                    ifelse(Akteur.Typ == "Media", "Media", 
                                           ifelse(Akteur.Typ == "Person", "Politican", 
                                                  ifelse(Akteur.Typ %in% c("Administration","Departement","Bundesamt"), "Gov",
                                                         ifelse(Akteur.Typ %in% c("Organisation","Komitee"), "Org", 
                                                                ifelse(Akteur.Typ =="ND", "SnowBallers", "NA")))))))

# - remove all retweets from tweets
retweet_out = T
if(retweet_out == T){
  tdf <- tdf %>% dplyr::filter(Is_retweet != T)
  unique(tdf$Is_retweet) # - check if all is  in order
}

gc()

# - factorize topic
tdf$topic <- factor(tdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))
sdf$topic <- factor(sdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))
fdf$topic <- factor(fdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))

# Nice Descriptive numbers:
tdf %>% group_by(Akteur_Art, User_id) %>% summarise(n = n()) %>% group_by(Akteur_Art) %>% summarise(n = n())
tdf %>% group_by(Akteur_Art) %>% summarise(n = n())
##########################################################################################
# 4) Figure 1
##########################################################################################
annotate <- data.frame(cbind(
  date = c("20200228","20200316","20200427","20200511","20200606","20200622"), 
  lab = c("Switzerland bans all large gatherings", #Die Schweiz verbietet alle großen Versammlungen
          "Switzerland declares state of emergency",
          "First easing step by the government",
          "Second easing step by the government",
          "Third easing step by the government",
          "Fourth easing step by the governemnt")
))

# Twitter Data...
#-----------------------------------------------------------------------------------------
timeplot_df <- tdf %>% dplyr::filter(la %in% c("de")) %>% mutate(Datum = as.Date(Datum)) %>% 
  dplyr::filter(Datum > as.Date("2020-02-21")) %>%
  mutate(day = lubridate::date(as.Date(Datum))) %>% 
  mutate(week = lubridate::date(cut(day, "week"))) %>% 
  group_by(Akteur_Art, week, topic) %>% 
  summarise(count = n()) %>% 
  mutate(freq = (count / sum(count) * 100),
         week = as.Date(week)) %>%
  ungroup %>%
  tidyr::complete(week = seq.Date(as.Date("2020-02-24"), max(week), by="week"), topic, Akteur_Art, fill = list(freq = 0, count = 0)) %>% 
  dplyr::filter(topic %in% c("Covid19","Masks","App","App & Masks")) %>%
  pivot_wider(names_from = topic, values_from = c(count, freq) , values_fill = list(count = 0, freq = 0)) %>% 
  mutate(count_Masks = count_Masks + `count_App & Masks`,
         count_App = count_App + `count_App & Masks`,
         `count_App or Masks` = count_App + `count_App & Masks` + count_Masks,
         freq_Masks = freq_Masks + `freq_App & Masks`,
         freq_App = freq_App + `freq_App & Masks`,
         `freq_App or Masks` = freq_App + `freq_App & Masks` + freq_Masks) %>% 
  dplyr::select(-c("count_App & Masks", "freq_App & Masks")) %>% 
  pivot_longer(names_to = c(".value","topic"), values_drop_na = F, names_sep = "_", 
               cols = c(count_Covid19,count_Masks,count_App,`count_App or Masks`,freq_Covid19,freq_Masks,freq_App,`freq_App or Masks`))

timeplot_df$topic <- factor(timeplot_df$topic, levels = c("Covid19","Masks","App","App or Masks")) 

# Filter Politicians and Parties
timeplot_df <- timeplot_df %>% dplyr::filter(Akteur_Art %in% c("Party", "Politican", "SnowBallers")) %>% dplyr::filter(topic %in% c("Covid19"))

# Descriptive for Plot
timeplot_df_names <- data.frame(matrix(nrow = 3, ncol = 3))
colnames(timeplot_df_names) <- c("gruppe", "n_users", "n_tweets")
timeplot_df_names$gruppe    <- c("Party", "Politican", "SnowBallers")
tmp_nan <- tdf %>% dplyr::filter(Datum > ymd("2020-02-21")) %>% 
  dplyr::filter(Akteur_Art %in% c("Party", "Politican", "SnowBallers")) %>% 
  group_by(Akteur_Art) %>% summarise(n = n())

timeplot_df_names$n_tweets <- tmp_nan$n

tmp_nam <- tdf %>% filter(Datum > ymd("2020-02-21")) %>%
  dplyr::filter(Akteur_Art %in% c("Party", "Politican", "SnowBallers")) %>% 
  group_by(Akteur_Art) %>% 
  dplyr::summarise(n = n_distinct(User_id))
timeplot_df_names$n_users <- sum(tmp_nam$n)

# SMD Data...
#-----------------------------------------------------------------------------------------
timeplot_dt <- sdf %>% dplyr::filter(la %in% c("de","fr","it")) %>% 
  dplyr::mutate(pubDateTime = as.Date(pubDateTime)) %>%
  dplyr::filter(pubDateTime > as.Date("2020-02-21")) %>% 
  mutate(day = lubridate::date(ymd(pubDateTime))) %>%
  mutate(week = lubridate::date(as.Date(cut(day, "week")))) %>% 
  group_by(la, week, topic) %>%                        
  summarise(count = n()) %>% 
  mutate(freq = (count / sum(count) * 100),
         week = as.Date(week)) %>%
  ungroup %>%
  tidyr::complete(week = seq.Date(as.Date("2020-02-24"), max(week), by="week"), 
                  nesting(la), topic, fill = list(freq = 0, count = 0)) %>% 
  dplyr::filter(topic %in% c("Covid19","Masks","App","App & Masks")) %>%
  pivot_wider(names_from = topic, values_from = c(count, freq) , values_fill = list(count = 0, freq = 0)) %>%
  mutate(count_Masks = count_Masks + `count_App & Masks`,
         count_App = count_App + `count_App & Masks`,
         `count_App or Masks` = count_App + `count_App & Masks` + count_Masks,
         freq_Masks = freq_Masks + `freq_App & Masks`,
         freq_App = freq_App + `freq_App & Masks`,
         `freq_App or Masks` = freq_App + `freq_App & Masks` + freq_Masks) %>%
  dplyr::select(-c(`count_App & Masks`, `freq_App & Masks`)) %>% 
  pivot_longer(names_to = c(".value","topic"), values_drop_na = F, names_sep = "_", 
               cols = c(count_Covid19,count_Masks,count_App,`count_App or Masks`,freq_Covid19,freq_Masks,freq_App,`freq_App or Masks`))

timeplot_dt$topic <- factor(timeplot_dt$topic, levels = c("Covid19","Masks","App","App or Masks")) 

timeplot_dt$Akteur_Art <- "Media"
timeplot_dt$la <- NULL

# Filter Topic
timeplot_dt <- timeplot_dt %>% dplyr::filter(topic %in% c("Covid19"))

timeplot_dt_names <- data.frame(matrix(nrow = 1, ncol = 3))
colnames(timeplot_dt_names) <- c("langs", "n_users", "n_tweets")
timeplot_dt_names$langs    <- "All"
timeplot_dt_names$n_tweets <- nrow(filter(sdf, pubDateTime > ymd("2020-02-21")))
tmp_nam <- sdf %>% dplyr::summarise(n = n_distinct(so))
timeplot_dt_names$n_users <- sum(tmp_nam$n)

# Combine Data 
#-----------------------------------------------------------------------------------------
timeplot_df <- dplyr::bind_rows(timeplot_df, timeplot_dt)


# Rename Masks ad App:
timeplot_df <- timeplot_df %>% mutate(topic = as.character(topic)) %>% 
  mutate(topic = case_when(topic %in% c("Masks") ~ "Face Masks",
                           topic %in% c("Covid19") ~ "Covid19",
                           topic %in% c("App") ~ "Covid App",
                           topic %in% c("App or Mask") ~ "Covid App or Face Masks",
                           TRUE ~ topic))


timeplot_df$Akteur_Art <- recode(timeplot_df$Akteur_Art ,
                                 `Party` = "Tweets by parties",
                                 `Politican` = "Tweets by politicians",
                                 `SnowBallers` = "Tweets by attentive public",
                                 `Media` = "Newspapers")
# Figure One:
timeplot_twitter_users <- ggplot(data = timeplot_df) +
  geom_line(aes(x = week, y = freq, color = Akteur_Art), alpha = .7, size = 1.2) +
  geom_text(data = annotate, 
            aes(x = ymd(date)- 2.5, y = 55, label = lab), 
            hjust = "right", color = "black", angle = 90, size = 5.0) +
  geom_vline(data = annotate, 
             aes(xintercept = ymd(date)), 
             color = "black")  +
  scale_x_date(date_breaks = "months", limits = c(as.Date("2020-02-24"),as.Date(lim_max)), labels = date_format("%b")) +
  scale_color_manual(values = c("#DD2461", "#999999", "#E69F00", "#0072B2"), #  old colors "#DD2461","#F5B700","#249fdd","#04E762"
                     labels = c("Newspapers","Tweets by Parties","Tweets by Politicians","Tweets by attentive public")) +
  labs(y = 'Share [%]', x = 'Date',
       color='Group of Users: ') +
  facet_wrap(~Akteur_Art, ncol = 1) +
  ddl_theme() +
  theme(legend.position = "none", 
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 0, hjust = .5, vjust = 1, size = 16, color = "black"),  
        axis.text.y = element_text(hjust=.5, size = 16, color = "black"),
        strip.text.x = element_text(size = 16, color = "black"),
        axis.title = element_text(size = 16, color = "black"),
        plot.title = element_text(size = 20, color = "black"),
        legend.text = element_text(size = 16, color = "black"),
        plot.margin = unit(c(.5,1.3,.5,.5), "cm"),
        legend.key.size = unit(1.5,"line"),
        axis.line.x = element_line(color="black", size = .5),
        axis.line.y = element_line(color="black", size = .5))

timeplot_twitter_users

ggsave("../images/figure_one.pdf", width = 14, height = 19, device = cairo_pdf)

